Data cleaning and auditing

Author

Florencia Grattarola

Published

April 14, 2025

library(httr)
library(jsonlite)
library(countrycode)
library(janitor)
library(readxl)
library(sf)
sf_use_s2(FALSE)
library(tmap)
tmap_mode('view')
library(tidyverse)
options(knitr.kable.NA = '')

Data cleaning

Read data

raw_metadata <- read_xlsx('data/metada_work_version.xlsx', guess_max = 4000) 

Check columns

raw_metadata <- raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) 

Check source fields

The fields are: name_orig, format, and language.

# name
raw_metadata %>% 
  filter(grepl('http', name_orig)) %>% 
  group_by(name_orig) %>% count

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  group_by(name_orig) %>% 
  summarise(n_countries = n_distinct(country)) %>% 
  select(name_orig, n_countries) %>% 
  print(n=10)

# format
raw_metadata %>% 
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  distinct(format)

# language
raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  distinct(language) %>% 
  print(n=50)

DOUBTS

  • There’s one source that has a URL instead of a name, that includes 12 records.

The name_orig is: https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/.
The id values are: [1] 446 447 448 449 450 451 452 453 454 455 456 457.

  • There are some formats with ? as value. What should they be?

FIX

# French: Liste rouge des Amphibiens du Grand Est
# English: Red list of Mammals of Grand Est    

raw_metadata %>% 
  mutate(name_orig = ifelse(grepl('htt', name_orig) & 
                               state_province == 'Grand Est',
                             str_glue('Red list of {group} of Grand Est'), name_orig)) %>% 
  filter(state_province == 'Grand Est') %>% select(group, name_orig)

raw_metadata %>% 
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) 

raw_metadata %>% 
  mutate(format = ifelse(format == '?', NA, format)) 

raw_metadata %>% 
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) 

Check Location fields

The fields are: continent, country, state_province, gadm_level_1, gadm_level_2, region_custom, region_detail, and iso_2.

# check continent
raw_metadata %>% 
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  distinct(continent)

# check country
raw_metadata %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', country, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>% 
  distinct(country)

# check state_province
raw_metadata %>% 
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(state_province = str_replace_all(state_province, 'And ', 'and ')) %>% 
  mutate(state_province = str_replace_all(state_province, 'Of', 'of')) %>% 
  mutate(state_province = str_replace_all(state_province, 'The', 'the')) %>% 
  filter(!is.na(state_province)) %>% 
  filter(state_province != gadm_level_1) %>% 
  distinct(country, state_province, gadm_level_1, iso_2, iso_3) %>% 
  print(n=100)

# check gadm_level_1 and gadm_level_2
raw_metadata %>% 
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  filter(!is.na(gadm_level_1)) %>% distinct(gadm_level_1) %>% 
  arrange(gadm_level_1) %>% print(n=100)

raw_metadata %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  filter(!is.na(gadm_level_2)) %>% select(gadm_level_2)

# check region_custom and region_detal
raw_metadata %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  filter(!is.na(region_custom)) %>%
  distinct(region_custom, region_detail, iso_2, iso_3) %>% 
  print(n=100)

# check iso_2 and iso_3
raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>% 
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  select(country, iso_2, iso_3, region_custom, region_detail) %>%  
  filter(is.na(iso_2)) #%>% distinct()

DOUBTS

  • What to do with country == USSR? Should be custom region and not country.

  • Kosovo is iso_2 = XK? Yes, it’s a temporary, unofficial country code top-level domain for Kosovo.

  • Is Antartica and French Souther Territories correct? Yes.

  • Lots of state_province and GADM_level_1 to correct. Is GADM_1 in English?

    ‘Südtirols’ = ‘Bolzano’ (state_name in German?) ‘Centre-Val De Loire’ = ‘Centre’ (something missing?) ‘Corsica’ = ‘Corse’ (something missing?) ‘Flanders’ = ‘Vlaanderen’ (English?) ‘Opole Voivodeship’ = ‘Opole’ (something missing?)

Check Taxon fields

The fields are: kingdom, phylum, subphylum, class, order, and group.

I searched names using the GBIF backbone, using my own custom function nameMatcherGBIF().

# gbif name parser
nameMatcherGBIF <- function(sp_name_list) {
  
  # api <- 'http://api.gbif.org/v1/parser/name'
  api <- 'http://api.gbif.org/v1/species/match'
  
  name_parsed <- tibble(sp_name = character(),
                        scientificName = character(),
                        kingdom = character(),
                        phylum = character(),
                        class = character(),
                        order = character(),
                        family = character(),
                        genus = character(),
                        specificEpithet = character(),
                        species = character(),
                        status = character(), 
                        rank = character())
  
  for(sp_name in sp_name_list){
    # cat(sp_name, '\n')
    call_url <- str_glue('{api}?name={sp_name}&strict=true&verbose=false')
    get_json_call <- GET(url = URLencode(call_url)) %>%
      content(as = "text") %>% fromJSON(flatten = TRUE)
    
    if(get_json_call$matchType == 'NONE') {
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = NA,
                              kingdom = NA,
                              phylum = NA,
                              class = NA,
                              order = NA,
                              family = NA,
                              genus = NA,
                              specificEpithet = NA,
                              species = NA,
                              status = NA, 
                              rank = NA)
      name_parsed <- rbind(name_parsed, name_parsed_i)
    } else{
      name_parsed_i <- tibble(sp_name = sp_name,
                              scientificName = ifelse(exists('scientificName',get_json_call), get_json_call$scientificName, NA),
                              kingdom = ifelse(exists('kingdom',get_json_call), get_json_call$kingdom, NA),
                              phylum = ifelse(exists('phylum',get_json_call), get_json_call$phylum, NA),
                              class = ifelse(exists('class',get_json_call), get_json_call$class, NA),
                              order = ifelse(exists('order',get_json_call), get_json_call$order, NA),
                              family = ifelse(exists('family',get_json_call), get_json_call$family, NA),
                              genus = ifelse(exists('genus',get_json_call), get_json_call$genus, NA),
                              specificEpithet = ifelse(exists('specificEpithet',get_json_call), get_json_call$specificEpithet, NA),
                              species = ifelse(exists('species',get_json_call), get_json_call$species, NA),
                              status = ifelse(exists('status',get_json_call), get_json_call$status, NA), 
                              rank = ifelse(exists('rank',get_json_call), get_json_call$rank, NA))
      
      name_parsed <- rbind(name_parsed, name_parsed_i)
    }
  }
  return(name_parsed)
}

sp_list <- raw_metadata %>% 
  distinct(kingdom, phylum, subphylum, class, order, group) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  pull(group)

sp_list_matched <- nameMatcherGBIF(sp_list) %>% suppressMessages()

sp_list_matched <- sp_list_matched %>% 
  mutate(scientificName = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                                    grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                                    grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                                    .default = scientificName)) %>% 
  mutate(kingdom = case_when(grepl('flora', sp_name, ignore.case=T) ~ 'Plantae',
                             grepl('fauna', sp_name, ignore.case=T) ~ 'Animalia',
                             grepl('tunicata', sp_name, ignore.case=T) ~ NA,
                             .default = kingdom)) %>% 
  mutate(phylum = ifelse(phylum == 'chordata', 'Chordata', phylum))

sp_list_unmatched <- sp_list_matched %>% 
  filter(is.na(scientificName)) %>% pull(sp_name)

sp_list_matched %>% filter(!is.na(kingdom)) %>% nrow() # matched
[1] 206
length(sp_list_unmatched) # not matched
[1] 358

When the taxon name (i.e., group) was not found I kept the previous taxonomic fields’ values.

merged_list <- left_join(raw_metadata %>%
                           mutate(group = str_trim(group)) %>%
                           distinct(group) %>%
                           arrange(group),
                         sp_list_matched %>%
                           filter(!is.na(scientificName)) %>%
                           rename(group=sp_name) %>% distinct()) %>%
  arrange(group)

raw_metadata_taxon_list <- raw_metadata %>%
  mutate(group = str_trim(group)) %>% 
  distinct(group, .keep_all = T) %>% 
  select(kingdom, phylum, subphylum, class, order, group) %>% 
  arrange(group)

left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
            select(-c(kingdom,phylum,subphylum,class,order)),
          bind_rows(merged_list %>% 
                      filter(!is.na(scientificName)),
                    merged_list %>% 
                      filter(is.na(scientificName)) %>%
                      select(group) %>% 
                      left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  mutate(group = str_replace_all(group, 'Et Al.', 'et al.')) %>%
  distinct(group, kingdom, phylum, class, order, family, rank) %>% 
  arrange(kingdom, phylum, class, order) %>% 
  kableExtra::kbl(booktabs = T) %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
group kingdom phylum class order family rank
Earthworms Animalia Anelida Clitellata Opisthopora
Oligochaeta Animalia Anelida Clitellata
Onychophora Animalia Animalia
Branchiobdellida Animalia Annelida Clitellata Branchiobdellida ORDER
Hirudinea Animalia Annelida Clitellata
Worms Animalia Annelida Clitellata
Leech Animalia Annelida Clitellata
Leeches Animalia Annelida Clitellata
Echiurida Animalia Annelida Echiura Echiuroidea
Sipunculids Animalia Annelida Sipuncula
Amblypygi Animalia Arthropoda Arachnida Amblypygi ORDER
Spiders Animalia Arthropoda Arachnida Araneae
Uropodina Animalia Arthropoda Arachnida Mesostigmata
Opiliones Animalia Arthropoda Arachnida Opiliones ORDER
Opilioness Animalia Arthropoda Arachnida Opiliones
Pseudoscorpiones Animalia Arthropoda Arachnida Pseudoscorpiones ORDER
False Scorpions Animalia Arthropoda Arachnida Pseudoscorpions
Scorpions Animalia Arthropoda Arachnida Scorpionida
Arachnida Animalia Arthropoda Arachnida CLASS
Anostraca Animalia Arthropoda Branchiopoda Anostraca ORDER
Branchiopoda Animalia Arthropoda Branchiopoda CLASS
Centipedes Animalia Arthropoda Chilopoda
Chilopoda Animalia Arthropoda Chilopoda CLASS
Collembola Animalia Arthropoda Collembola CLASS
Entomostraca Animalia Arthropoda Copepoda
Copepoda Animalia Arthropoda Copepoda CLASS
Millipedes Animalia Arthropoda Diplopoda
Diplopoda Animalia Arthropoda Diplopoda CLASS
Archaeognatha Animalia Arthropoda Insecta Archaeognatha ORDER
Blattodea Animalia Arthropoda Insecta Blattodea ORDER
Wood Cockroaches Animalia Arthropoda Insecta Blattodea
Cockroaches Animalia Arthropoda Insecta Blattodea
Hydraenidae Animalia Arthropoda Insecta Coleoptera Hydraenidae FAMILY
Coleoptera Animalia Arthropoda Insecta Coleoptera ORDER
Carabidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Saproxylic Beetles Animalia Arthropoda Insecta Coleoptera
Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn and Scarab Beetles Animalia Arthropoda Insecta Coleoptera
Longhorn Beetles Animalia Arthropoda Insecta Coleoptera
Scarabaeidae Animalia Arthropoda Insecta Coleoptera Scarabaeidae FAMILY
Ladybugs Animalia Arthropoda Insecta Coleoptera
Water Beetles Animalia Arthropoda Insecta Coleoptera
Tenebrionidae Animalia Arthropoda Insecta Coleoptera Tenebrionidae FAMILY
Soldier Beetles Animalia Arthropoda Insecta Coleoptera
Leaf Beetles Animalia Arthropoda Insecta Coleoptera
Histeridae Animalia Arthropoda Insecta Coleoptera Histeridae FAMILY
Sphaeritidae Animalia Arthropoda Insecta Coleoptera Sphaeritidae FAMILY
Derodontidoidea Animalia Arthropoda Insecta Coleoptera
Bostrichoidea Animalia Arthropoda Insecta Coleoptera
Staphylinidae Animalia Arthropoda Insecta Coleoptera Staphylinidae FAMILY
Lucanidae Animalia Arthropoda Insecta Coleoptera Lucanidae FAMILY
Geotrupidae Animalia Arthropoda Insecta Coleoptera Geotrupidae FAMILY
Trogidae Animalia Arthropoda Insecta Coleoptera Trogidae FAMILY
Silphidae Animalia Arthropoda Insecta Coleoptera Silphidae FAMILY
Chrysomelidae Animalia Arthropoda Insecta Coleoptera Chrysomelidae FAMILY
Bark Beetles Animalia Arthropoda Insecta Coleoptera
Ground Beetles Animalia Arthropoda Insecta Coleoptera
Curculionidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Powderpost Beetles Animalia Arthropoda Insecta Coleoptera
Bostrichidae Animalia Arthropoda Insecta Coleoptera Bostrichidae FAMILY
Anobiidae Animalia Arthropoda Insecta Coleoptera Anobiidae FAMILY
Ptinidae Animalia Arthropoda Insecta Coleoptera Ptinidae FAMILY
Deadwood Beetle Animalia Arthropoda Insecta Coleoptera
Buprestidae Animalia Arthropoda Insecta Coleoptera Buprestidae FAMILY
Snout Beetles Animalia Arthropoda Insecta Coleoptera
Staphylinoidea Animalia Arthropoda Insecta Coleoptera
Cucujoidea Animalia Arthropoda Insecta Coleoptera
Lamellicornia Animalia Arthropoda Insecta Coleoptera
Seed Beetles Animalia Arthropoda Insecta Coleoptera
Weevils Animalia Arthropoda Insecta Coleoptera
Anthribidae Animalia Arthropoda Insecta Coleoptera Anthribidae FAMILY
Platypodidae Animalia Arthropoda Insecta Coleoptera Curculionidae FAMILY
Ground Beetle Animalia Arthropoda Insecta Coleoptera
Tiger Beetles Animalia Arthropoda Insecta Coleoptera
Megalopodidae Animalia Arthropoda Insecta Coleoptera Megalopodidae FAMILY
Scarabaeoidea Animalia Arthropoda Insecta Coleoptera
Cerambycidae Animalia Arthropoda Insecta Coleoptera Cerambycidae FAMILY
Curculionoidea Animalia Arthropoda Insecta Coleoptera
Cleroidea Animalia Arthropoda Insecta Coleoptera
Elateridae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Lymexyloidea Animalia Arthropoda Insecta Coleoptera
Cicindelidae Animalia Arthropoda Insecta Coleoptera Carabidae FAMILY
Lucanoidea Animalia Arthropoda Insecta Coleoptera
Hydrophilidae Animalia Arthropoda Insecta Coleoptera Hydrophilidae FAMILY
Platypsyllinae Animalia Arthropoda Insecta Coleoptera
Cholevinae Animalia Arthropoda Insecta Coleoptera
Malachiidae Animalia Arthropoda Insecta Coleoptera Malachiidae FAMILY
Melyridae Animalia Arthropoda Insecta Coleoptera Melyridae FAMILY
Phloeophilidae Animalia Arthropoda Insecta Coleoptera Phloiophilidae FAMILY
Cleridae Animalia Arthropoda Insecta Coleoptera Cleridae FAMILY
Cerophytidae Animalia Arthropoda Insecta Coleoptera Cerophytidae FAMILY
Eucnemidae Animalia Arthropoda Insecta Coleoptera Eucnemidae FAMILY
Cryptophagidae Animalia Arthropoda Insecta Coleoptera Cryptophagidae FAMILY
Latridiidae Animalia Arthropoda Insecta Coleoptera Latridiidae FAMILY
Mycetophagidae Animalia Arthropoda Insecta Coleoptera Mycetophagidae FAMILY
Zopheridae Animalia Arthropoda Insecta Coleoptera Zopheridae FAMILY
Monotomidae Animalia Arthropoda Insecta Coleoptera Monotomidae FAMILY
Phalacridae Animalia Arthropoda Insecta Coleoptera Phalacridae FAMILY
Pyrochroide Animalia Arthropoda Insecta Coleoptera
Meloidae Animalia Arthropoda Insecta Coleoptera
Orsodacnidae Animalia Arthropoda Insecta Coleoptera Orsodacnidae FAMILY
Donaciinae Animalia Arthropoda Insecta Coleoptera
Leptinidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Lissomidae Animalia Arthropoda Insecta Coleoptera Elateridae FAMILY
Derodontoidae Animalia Arthropoda Insecta Coleoptera
Leiodidae Animalia Arthropoda Insecta Coleoptera Leiodidae FAMILY
Oedemeridae Animalia Arthropoda Insecta Coleoptera Oedemeridae FAMILY
Melandryidae Animalia Arthropoda Insecta Coleoptera Melandryidae FAMILY
Dung Beetles Animalia Arthropoda Insecta Coleoptera
Earwigs Animalia Arthropoda Insecta Dermaptera
Heteroptera Animalia Arthropoda Insecta Diptera Sphaeroceridae GENUS
Acalyptratae Animalia Arthropoda Insecta Diptera
Calypterate Animalia Arthropoda Insecta Diptera
Larger Brachycera Animalia Arthropoda Insecta Diptera
Dolichopodid Animalia Arthropoda Insecta Diptera
Lonchopteridae Animalia Arthropoda Insecta Diptera Lonchopteridae FAMILY
Platypezidae Animalia Arthropoda Insecta Diptera Platypezidae FAMILY
Opetiidae Animalia Arthropoda Insecta Diptera Opetiidae FAMILY
Hoverflies Animalia Arthropoda Insecta Diptera
Chaoboridae Animalia Arthropoda Insecta Diptera Chaoboridae FAMILY
Thaumaleidae Animalia Arthropoda Insecta Diptera Thaumaleidae FAMILY
Ceratopogonidae Animalia Arthropoda Insecta Diptera Ceratopogonidae FAMILY
Flies Animalia Arthropoda Insecta Diptera
Asilidae Animalia Arthropoda Insecta Diptera Asilidae FAMILY
Psychodidae Animalia Arthropoda Insecta Diptera Psychodidae FAMILY
Dixidae Animalia Arthropoda Insecta Diptera Dixidae FAMILY
Soldier Flies Animalia Arthropoda Insecta Diptera
Horse-Flies Animalia Arthropoda Insecta Diptera
Bee Flies Animalia Arthropoda Insecta Diptera
Empididae Animalia Arthropoda Insecta Diptera Empididae FAMILY
Conopidae Animalia Arthropoda Insecta Diptera Conopidae FAMILY
Chironomidae Animalia Arthropoda Insecta Diptera Chironomidae FAMILY
Dolichopodidae Animalia Arthropoda Insecta Diptera Dolichopodidae FAMILY
Black Flies Animalia Arthropoda Insecta Diptera
Long-Legged Flies Animalia Arthropoda Insecta Diptera
Micropezidae Animalia Arthropoda Insecta Diptera Micropezidae FAMILY
Grass Flies Animalia Arthropoda Insecta Diptera
Tachinidae Animalia Arthropoda Insecta Diptera Tachinidae FAMILY
Aquatic Empididae Animalia Arthropoda Insecta Diptera
Pediciidae Animalia Arthropoda Insecta Diptera Pediciidae FAMILY
Limoniidae Animalia Arthropoda Insecta Diptera Limoniidae FAMILY
Diptera Animalia Arthropoda Insecta Diptera
Parasitic Diptera Animalia Arthropoda Insecta Diptera
Mayflies Animalia Arthropoda Insecta Ephemeroptera
Auchenorrhyncha Animalia Arthropoda Insecta Hemiptera
Cicadas Animalia Arthropoda Insecta Hemiptera
Shieldbugs Animalia Arthropoda Insecta Hemiptera
Clavicornia Animalia Arthropoda Insecta Hemiptera Aradidae GENUS
Big-Eyed Bugs Animalia Arthropoda Insecta Hemiptera
Nepomorpha Animalia Arthropoda Insecta Hemiptera
Fulgoromorpha Animalia Arthropoda Insecta Hemiptera
Cicadomorpha Animalia Arthropoda Insecta Hemiptera Palaeontinidae GENUS
Scale Insect Animalia Arthropoda Insecta Hemiptera
Hemiptera Animalia Arthropoda Insecta Hemiptera ORDER
Ants Animalia Arthropoda Insecta Hymenoptera
Bombus Spp. Animalia Arthropoda Insecta Hymenoptera
Hymenoptera Animalia Arthropoda Insecta Hymenoptera ORDER
Bees Animalia Arthropoda Insecta Hymenoptera
Diversicornia Animalia Arthropoda Insecta Hymenoptera Encyrtidae GENUS
Sawflies Animalia Arthropoda Insecta Hymenoptera
Spheciformes Animalia Arthropoda Insecta Hymenoptera
Pompilidae Animalia Arthropoda Insecta Hymenoptera Pompilidae FAMILY
Chrysididae Animalia Arthropoda Insecta Hymenoptera Chrysididae FAMILY
Scolioidea Animalia Arthropoda Insecta Hymenoptera
Cuckoo Wasp Animalia Arthropoda Insecta Hymenoptera
Wasps Animalia Arthropoda Insecta Hymenoptera
Sphecidae Animalia Arthropoda Insecta Hymenoptera Sphecidae FAMILY
Wild Bees Animalia Arthropoda Insecta Hymenoptera
Scoliidae Animalia Arthropoda Insecta Hymenoptera Scoliidae FAMILY
Crabronidae et al. Animalia Arthropoda Insecta Hymenoptera
Chrysididae et al. Animalia Arthropoda Insecta Hymenoptera
Symphyta Animalia Arthropoda Insecta Hymenoptera
Stinging Wasps Animalia Arthropoda Insecta Hymenoptera
Mutillidae Animalia Arthropoda Insecta Hymenoptera Mutillidae FAMILY
Sapygidae Animalia Arthropoda Insecta Hymenoptera Sapygidae FAMILY
Tiphiidae Animalia Arthropoda Insecta Hymenoptera Tiphiidae FAMILY
Cimbicidae Animalia Arthropoda Insecta Hymenoptera Cimbicidae FAMILY
Siricidae Animalia Arthropoda Insecta Hymenoptera Siricidae FAMILY
Xiphydriidae Animalia Arthropoda Insecta Hymenoptera Xiphydriidae FAMILY
Ampulicidae Animalia Arthropoda Insecta Hymenoptera Ampulicidae FAMILY
Crabronidae Animalia Arthropoda Insecta Hymenoptera Crabronidae FAMILY
Apoidea Animalia Arthropoda Insecta Hymenoptera
Lepidoptera Animalia Arthropoda Insecta Lepidoptera ORDER
Moths Animalia Arthropoda Insecta Lepidoptera
Butterflies Animalia Arthropoda Insecta Lepidoptera
Papilionoidea Animalia Arthropoda Insecta Lepidoptera
Hesperioidea Animalia Arthropoda Insecta Lepidoptera
Noctuidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Night Butterflies Animalia Arthropoda Insecta Lepidoptera
Owlet Moths Animalia Arthropoda Insecta Lepidoptera
Lymantriinae Animalia Arthropoda Insecta Lepidoptera
Short-Cloaked Moth Animalia Arthropoda Insecta Lepidoptera
Geometer Moths Animalia Arthropoda Insecta Lepidoptera
Bombyces Animalia Arthropoda Insecta Lepidoptera
Sphinges S.l. Animalia Arthropoda Insecta Lepidoptera
Pyralidae Animalia Arthropoda Insecta Lepidoptera Pyralidae FAMILY
Sphinges Animalia Arthropoda Insecta Lepidoptera
Geometridae Animalia Arthropoda Insecta Lepidoptera Geometridae FAMILY
Makrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Macrolepidoptera Animalia Arthropoda Insecta Lepidoptera
Microlepidoptera Animalia Arthropoda Insecta Lepidoptera
Zygaenidae Animalia Arthropoda Insecta Lepidoptera Zygaenidae FAMILY
Sphingidae Animalia Arthropoda Insecta Lepidoptera Sphingidae FAMILY
Sesiidae Animalia Arthropoda Insecta Lepidoptera Sesiidae FAMILY
Psychidae Animalia Arthropoda Insecta Lepidoptera Psychidae FAMILY
Pterophoridae Animalia Arthropoda Insecta Lepidoptera Pterophoridae FAMILY
Alucitidae Animalia Arthropoda Insecta Lepidoptera Alucitidae FAMILY
Crambidae Animalia Arthropoda Insecta Lepidoptera Crambidae FAMILY
Torticidae Animalia Arthropoda Insecta Lepidoptera
Choreutidae Animalia Arthropoda Insecta Lepidoptera Choreutidae FAMILY
Hawk Moths Animalia Arthropoda Insecta Lepidoptera
Bombycidae Animalia Arthropoda Insecta Lepidoptera Bombycidae FAMILY
Pantheidae Animalia Arthropoda Insecta Lepidoptera Noctuidae FAMILY
Nolidae Animalia Arthropoda Insecta Lepidoptera Nolidae FAMILY
Sessidae Animalia Arthropoda Insecta Lepidoptera
Erebidae Animalia Arthropoda Insecta Lepidoptera Erebidae FAMILY
Mantodea Animalia Arthropoda Insecta Mantodea ORDER
Mecoptera Animalia Arthropoda Insecta Mecoptera ORDER
Scorpionflies Animalia Arthropoda Insecta Mecoptera
Megaloptera Animalia Arthropoda Insecta Megaloptera
Neuroptera Animalia Arthropoda Insecta Neuroptera ORDER
Owlflies Animalia Arthropoda Insecta Neuroptera
Net-Winged Insects Animalia Arthropoda Insecta Neuroptera
Odonata Animalia Arthropoda Insecta Odonata ORDER
Orthoptera Animalia Arthropoda Insecta Orthoptera ORDER
Grasshoppers Animalia Arthropoda Insecta Orthoptera
Ensifera Animalia Arthropoda Insecta Orthoptera
Caelifera Animalia Arthropoda Insecta Orthoptera
Crickets Animalia Arthropoda Insecta Orthoptera
Katydids Animalia Arthropoda Insecta Orthoptera
Stick Insects Animalia Arthropoda Insecta Phasmatodea
Stoneflies Animalia Arthropoda Insecta Plecoptera
Snakeflies Animalia Arthropoda Insecta Raphidioptera
Thrips Animalia Arthropoda Insecta Thysanoptera Thripidae GENUS
Trichoptera Animalia Arthropoda Insecta Trichoptera ORDER
Caddisflies Animalia Arthropoda Insecta Trichoptera
Plecoptera Animalia Arthropoda Insecta CLASS
Insects Animalia Arthropoda Insecta
Aquatic and Semi-Aquatic Bugs Animalia Arthropoda Insecta
Insecta Animalia Arthropoda Insecta CLASS
Water Bugs Animalia Arthropoda Insecta
Woodlice Animalia Arthropoda Isopoda
Amphipoda Animalia Arthropoda Malacostraca Amphipoda ORDER
Niphargidae Animalia Arthropoda Malacostraca Amphipoda Niphargidae FAMILY
Cumacea Animalia Arthropoda Malacostraca Cumacea ORDER
Decapoda Animalia Arthropoda Malacostraca Decapoda ORDER
Crayfishes Animalia Arthropoda Malacostraca Decapoda
Astacoidea Animalia Arthropoda Malacostraca Decapoda
Freshwater Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Decapod Crustaceans Animalia Arthropoda Malacostraca Decapoda
Marine Isopoda Animalia Arthropoda Malacostraca Isopoda
Malacostraca Animalia Arthropoda Malacostraca CLASS
Mysidacea Animalia Arthropoda Malacostraca CLASS
Barnacles Animalia Arthropoda Thecostraca
Freshwater Crabs Animalia Arthropoda Decapoda
Horseshoe Crabs Animalia Arthropoda Xiphosura
Crustaceans Animalia Arthropoda
Arthropods Animalia Arthropoda
Freshwater Crustaceans Animalia Arthropoda
Crayfish Animalia Arthropoda
Sea Spiders Animalia Arthropoda
Marine Crustaceans Animalia Arthropoda
Miscellaneous Arthropods Animalia Arthropoda
Myriapods Animalia Arthropoda
Brachiopods Animalia Brachiopoda
Marine Bryozoans Animalia Bryoza
Bryozoa Animalia Bryozoa PHYLUM
Bony Fishes Animalia Chordata Actinopterygii
Frogs Animalia Chordata Amphibia Anura
Amphibians Animalia Chordata Amphibia
Sea Squirts Animalia Chordata Ascidiacea
Anatidae Animalia Chordata Aves Anseriformes Anatidae FAMILY
Galliformes Animalia Chordata Aves Galliformes ORDER
Birds Animalia Chordata Aves
Breeding Birds Animalia Chordata Aves
Wintering Birds Animalia Chordata Aves
Transient Birds Animalia Chordata Aves
Birds Terre Adelie Animalia Chordata Aves
Birds Terres Australes Animalia Chordata Aves
Birds Scattered Islands Animalia Chordata Aves
Migratory Birds Animalia Chordata Aves
Metropolitan Birds Animalia Chordata Aves
Endemic Brids Animalia Chordata Aves
Breeding Birds of Prey Animalia Chordata Aves
Endangered Birds Animalia Chordata Aves
Birds Ecuador Animalia Chordata Aves
Birds Galapagos Animalia Chordata Aves
Birds of Prey Animalia Chordata Aves
Breeding Raptors Animalia Chordata Aves
Freshwater Lamprey Animalia Chordata Cephalaspidomorphi
Sharks Animalia Chordata Chondrichthyes
Chondrichthyes Animalia Chordata Chondrichthyes
Lamprey Animalia Chordata Hyperoartia
Cetaceans Animalia Chordata Mammalia Artiodactyla
Bats Animalia Chordata Mammalia Chiroptera
Perissodactyla Animalia Chordata Mammalia Perissodactyla ORDER
Primates Animalia Chordata Mammalia Primates ORDER
Lemurs Animalia Chordata Mammalia Primates
Rodents Animalia Chordata Mammalia Rodentia
Mammals Animalia Chordata Mammalia
Terrestrial Mammals Animalia Chordata Mammalia
Marine Mammals Animalia Chordata Mammalia
Mammals Scattered Islands Animalia Chordata Mammalia
Terrestial Mammals Animalia Chordata Mammalia
Aquatic Mammals Animalia Chordata Mammalia
Metropolitan Mammals Animalia Chordata Mammalia
Endemic Mammals Animalia Chordata Mammalia
Large Mammals Animalia Chordata Mammalia
Insectivores Animalia Chordata Mammalia
Carnivores Animalia Chordata Mammalia
Marine Cetartiodactyla Animalia Chordata Mammalia
Terrestrial Cetartiodactyla Animalia Chordata Mammalia
Proboscidea & Sirenia Animalia Chordata Mammalia
Endangered Mammals Animalia Chordata Mammalia
Ungulates Animalia Chordata Mammalia
Land Mammals Animalia Chordata Mammalia
Caimans Animalia Chordata Reptilia Crocodilia
Chameleons Animalia Chordata Reptilia Squamata
Lizards and Worm-Lizards Animalia Chordata Reptilia Squamata
Snakes Animalia Chordata Reptilia Squamata
Marine Turtles Animalia Chordata Reptilia Testudines
Turtles Animalia Chordata Reptilia Testudines
Sea Turtles Animalia Chordata Reptilia Testudines
Reptiles Animalia Chordata Reptilia
Terrestrial Reptiles Animalia Chordata Reptilia
Endemic Lizards Animalia Chordata Reptilia
Endemic Reptiles Animalia Chordata Reptilia
Fishes Animalia Chordata
Lampreys Animalia Chordata
Freshwater Fishes Animalia Chordata
Marine Fishes Animalia Chordata
Tunicata Animalia Chordata
Lancelets Animalia Chordata
Reef Fishes Animalia Chordata
Terrestrial Vertebrates Animalia Chordata
Freshwater and Migratory Fishes Animalia Chordata
Cyclostomata Animalia Chordata
Endangered Vertebrates Animalia Chordata
Endemic Freshwater Fishes Animalia Chordata
Linefishes Animalia Chordata
Brackish and Freshwater Fishes Animalia Chordata
Corals Animalia Cnidaria
Cnidaria Animalia Cnidaria PHYLUM
Reef Corals Animalia Cnidaria
Marine Cnidaria Animalia Cnidaria
Echinoderms Animalia Echinodermata
Acorn Worms Animalia Hemichordata Enteropneusta
Bivalvia Animalia Mollusca Bivalvia CLASS
Marine Bivalves Animalia Mollusca Bivalvia
Mussels Animalia Mollusca Bivalvia
Musslels Animalia Mollusca Bivalvia
Cephalopods Animalia Mollusca Cephalopoda
Gastropoda Animalia Mollusca Gastropoda CLASS
Snails Animalia Mollusca Gastropoda
Marine Snails Animalia Mollusca Gastropoda
Terrestrial Gastropods Animalia Mollusca Gastropoda
Freshwater Gastropods Animalia Mollusca Gastropoda
Mollusca Animalia Mollusca PHYLUM
Molluscs Animalia Mollusca
Terrestrial Molluscs Animalia Mollusca
Non-Marine Molluscs Animalia Mollusca
Inland Molluscs Animalia Mollusca
Species-Poor Groups of Marine Molluscs Animalia Mollusca
Freswater Mollusc Animalia Mollusca
Extramarine Molluscs Animalia Mollusca
Nematoda Animalia Nematoda PHYLUM
Ribbon Worms Animalia Nemertea
Flatworms Animalia Platyhelminthes Turbellaria
Porifera Animalia Porifera PHYLUM
Sea Sponges Animalia Porifera
Vertebrates Animalia chordata
Fauna Animalia
Cave Fauna Animalia
Polychaeta Animalia KINGDOM
Invertebrates Animalia
Fauna_en_higher Animalia
Fauna_nt_lc_dd Animalia
Fauna_en_vu Animalia
Terrestrial Invertebrates Animalia
Aquatic Invertebrates Animalia
Fauna_flagship Species Animalia
Endemic Fauna Animalia
Protected Animals Animalia
Endangered Fauna Animalia
Selected Species Animalia
Marine Species Animalia
Endangered Species Animalia
Marine Invertebrates Animalia
Freshwater Plants Animalia
Flora Visiting Fauna Animalia
Other Invertebrates Animalia
Other Marine Invertebrates Animalia
Marine Fauna Animalia
Endemic Animals Animalia
Vaucheriaceae Chromista Ochrophyta Xanthophyceae Vaucheriales Vaucheriaceae FAMILY
Ascomycota Fungi Ascomycota PHYLUM
Agaricales Fungi Basidiomycota Agaricomycetes Agaricales ORDER
Boletaceae Fungi Basidiomycota Agaricomycetes Boletales Boletaceae FAMILY
Boletales Fungi Basidiomycota Agaricomycetes Boletales ORDER
Russulales Fungi Basidiomycota Agaricomycetes Russulales ORDER
Ustilaginales Fungi Basidiomycota Ustilaginomycetes Ustilaginales ORDER
Basidiomycota Fungi Basidiomycota PHYLUM
Fungi Fungi KINGDOM
Macromycetes Fungi
Mushrooms Fungi
Macrofungi Fungi
Phytoparasitic Small Fungi Fungi
Large Mushrooms Fungi
Lichenicolous Fungus Fungi
Ascomycetes Fungi
Aphyllophorales Fungi
Phytoparasitic Microfungi Fungi
Characeae Plantae Charophyta Charophyceae Charales Characeae FAMILY
Charophyceae Plantae Charophyta Charophyceae CLASS
Desmidiales Plantae Charophyta Conjugatophyceae Desmidiales ORDER
Zygnematophyceae Plantae Charophyta Zygnematophyceae CLASS
Freshwater Diatoms Plantae Gyrista Bacillariophyceae
Marchantiophyta Plantae Marchantiophyta PHYLUM
Apiaceae Plantae Tracheophyta Magnoliopsida Apiales Apiaceae FAMILY
Cactaceae Plantae Tracheophyta Magnoliopsida Caryophyllales Cactaceae FAMILY
Magnoliaceae Plantae Tracheophyta Magnoliopsida Magnoliales Magnoliaceae FAMILY
Anisoptera Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae GENUS
Dipterocarpaceae Plantae Tracheophyta Magnoliopsida Malvales Dipterocarpaceae FAMILY
Magnoliophyta Plantae Tracheophyta PHYLUM
Charophytes Plantae Charophyceae Charales
Lycopods Plantae Lycopodiopsida Lycopodiales
Tree Ferns Plantae Polypodiopsida
Sphagnum Mosses Plantae Sphagnopsida
Orchids Plantae Asparagales
Wild Cinnamon Plantae Laurales
Bromeliads Plantae Poales
Flora Plantae
Bryophytes Plantae
Ferns Plantae
Vascular Plants Plantae
Lichens Plantae
Hydrophytes Plantae
Marine Flora Plantae
Hornworts Plantae
Liverworts Plantae
Mosses Plantae
Flora Saint Paul and Amsterdam Plantae
Flora Scattered Islands Plantae
Flora Kerguelen Plantae
Endemic Flora Plantae
Trees Plantae
Shrubs Plantae
Algae Plantae
Arctic Vascular Plants Plantae
Marine Macroalgae Plantae
Freshwater Red Algae Plantae
Freshwater Brown Algae Plantae
Flowering Plant Plantae
Red Algae Plantae
Brown Algae Plantae
Hepaticophyta Plantae
Broad-Leaved Mosses Plantae
Lichen Communities Plantae
Flora of Cerrado Biom Plantae
Endemic Plants Plantae
Flora On the Red List Plantae
Protected Plants Plantae
Endangered Plants Plantae
Near-Endemic Flora Plantae
Perennial Shrubs Plantae
Flora_2 Plantae
Endemic and Range-Restricted Vascular Plantss Plantae
Indigenous Plants Plantae
Selected Species In Marshlands Plantae
Conifers Plantae
Peninsular Planrs Plantae
Lycophytes Plantae
Higher Plants Plantae
Cloud Forest Trees Plantae
Spermatophytes Plantae
Palms Plantae
Wild Crop Relatives Plantae
Aquatic Plants Plantae
Medicinal Plants Plantae
Dry Forest Trees Plantae
Monocotyledons Plantae
Freshwater Flora Plantae
Flora List Plantae
Endemic Trees Plantae
Myxomycetes Protozoa Mycetozoa Myxomycetes CLASS
Protozoa Protozoa KINGDOM
Zygoptera Protozoa GENUS
  • The value Tracheophyta is for vascular plants. Some groups with value Flora have this but some others don’t. Is it correct?
  • There are some groups with value Fauna that have Chordata and Vertebrata as phylum and subphylum, but some others don’t. Is it correct?
  • The group Onychophora should be phylum Onychophora.
  • There’s a typo on group Opilioness, should be Opiliones.
  • Correct the value to order Pseudoscorpiones for the group False Scorpions.
  • The group Entomostraca should not have Copepoda as class.
  • Crabs should have class Malacostraca.
  • Sharks should not have Chondrichthyes as class.
  • Groups Cetartiodactyla and Marine Cetartiodactyla should be order Artiodactyla.
  • Carnivores should be order Carnivora.
  • Group Lizards should be order Squamata.
  • Group Lampreys should be class Petromyzonti and order Petromyzontiformes.
  • There’s a typo on group Musslels, should be Mussels.
  • There’s a typo on the phylum chordata, should be uppercase.
  • The groups Orchids and Bromeliads, are missing phylum, class and family. They should be Tracheophyta and Liliopsida. Also family Orchidaceae and Bromeliaceae.
  • The group Wild Cinnamon, is missing phylum, class and order. They shoud be Tracheophyta Magnoliopsida Canellales Canellaceae, and the class should have family Canellaceae.
  • The groups Vascular plants, Trees, Shrubs, Cloud Forest Trees, Endemic Trees, Perennial Shrubs, Arctic Vascular Plants, Spermatophytes and Angiosperms, should have Tracheophyta as phylum.
  • The group Flowering Plant should have
  • The group Ferns should have Tracheophyta and Polypodiopsida, as phylum and class.
  • The group Conifers, should have class Pinopsida.
  • The group Palms, should have Tracheophyta, Liliopsida, Arecales, and Arecaceae, as phylum, class, order and family.
  • The group Monocotyledons should have Tracheophyta and Liliopsida, as phylum and class.
  • There’s a typo in Endemic and Range-Restricted Vascular Plantss. This should have Tracheophyta as phylum.

Check Event fields

The fields is: year

raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  select(year, name_orig) %>% 
  mutate(year = ifelse(year == 'NA', NA, year)) %>% 
  mutate(year_new = as.numeric(year, na.rm=T)) %>% 
  filter(is.na(year_new)) %>% 
  distinct(name_orig, .keep_all = T) 

raw_metadata %>% 
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  mutate(year = ifelse(year == 'NA', NA, year)) %>% 
  mutate(year = ifelse(year == '2024?', 2024, year)) %>%
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  mutate(year = case_when(is.na(year) & 
                            name_orig == 'Crveni popis hrvatskih koralja' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'A Red List of Benin’s sharks' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Crveni popis lišajeva Hrvatske' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Červené seznamy' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Coleoptera (Beetle) – Invertebrate Ireland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Tricoptera (Caddisfly) – InvertebrateIreland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Les mammifères de la Côte d’Ivoire' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Азербайджанской Республики' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Узбекистана' 
                          ~ 2024,
                          .default = year)) %>% 
  # filter(is.na(year)) %>% 
  select(year, name_orig) %>% 
  distinct(name_orig, .keep_all = T) 

DOUBTS

  • There are 16 records with year = 2024?. Is it correct to change them for 2024? They belong to the sources:
https://www.odonat-grandest.fr/listes-rouges-grand-est-etat-avancement/
A Red List of Benin’s sharks                                           
The Red List of Mammals of South Africa, Swaziland and Lesotho 2024  
  • There are 9 records without year. What should we do with them? They belong to the following source:
1 Crveni popis hrvatskih koralja                                                        
2 Crveni popis lišajeva Hrvatske                                                        
3 Červené seznamy                                                                       
4 Coleoptera (Beetle) – Invertebrate Ireland Online                                     
5 Tricoptera (Caddisfly) – InvertebrateIreland Online                                   
6 Les mammifères de la Côte d’Ivoire                                                    
7 Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation
8 Красная книга Азербайджанской Республики                                              
9 Красная книга Узбекистана  

Check if URLs are working

# URL incorrect
raw_metadata %>% 
  filter(!grepl('http', url_clean)) %>% 
  distinct(url_clean)
# A tibble: 3 × 1
  url_clean                                                                     
  <chr>                                                                         
1 "Nicolau, J. i Dalmau, J., 2008. Llista Vermella\r\ndels Vertebrats d’Andorra…
2 "ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna"                 
3  <NA>                                                                         
# URL error
# raw_metadata %>% filter(grepl('http', url_clean)) %>% 
#   distinct(url_clean) %>% 
#   mutate(check_URL = ifelse(map(URLencode(url_clean), http_error), 'not found', 'OK')) %>% 
#   filter(check_URL == 'not found') 

DOUBTS

  • There’s one URL that resolves but has ‘http’ missing and one URL that is incorrect. Should this be NA?
ima.sc.gov.br/index.php/biodiversidade/biodiversidade/fauna
Nicolau, J. i Dalmau, J., 2008. Llista Vermella\r\ndels Vertebrats d’Andorra. BIOCOM (Biologia i\r\nComunicació) SL i Departament de Patrimoni\r\nNatural del Govern d’Andorra. Informe inèdit

Run code and keep relevant fields

metadata <- 
  # check taxon
  left_join(raw_metadata %>% mutate(group = str_trim(group)) %>%
              select(-c(kingdom,phylum,subphylum,class,order)),
            bind_rows(merged_list %>% filter(!is.na(scientificName)),
                      merged_list %>% filter(is.na(scientificName)) %>%
                        select(group) %>%
                        left_join(. , raw_metadata_taxon_list))) %>% 
  mutate(group = str_trim(group)) %>% 
  mutate(group = str_to_title(group)) %>% 
  mutate(group = str_replace_all(group, 'And ', 'and ')) %>% 
  mutate(group = str_replace_all(group, 'Of', 'of')) %>% 
  mutate(group = str_replace_all(group, 'The', 'the')) %>%
  # more taxonomic corrections
  #
  #
  #
  #
  # check columns
  janitor::clean_names() %>% 
  janitor::remove_empty(c('rows', 'cols')) %>% 
  # check source
  mutate(name_orig = str_squish(name_orig)) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\\\\")) %>% 
  mutate(name_orig = str_remove_all(name_orig, "\"")) %>% 
  mutate(name_orig = ifelse(grepl('htt', name_orig) & 
                               state_province == 'Grand Est',
                             str_glue('Red list of {group} of Grand Est'), name_orig)) %>% 
  # missing name of source
  
  # check format
  mutate(format = ifelse(format == 'NA', NA, str_squish(format))) %>% 
  mutate(format = ifelse(format == '?', NA, format)) %>% 
  # check language
  mutate(language = ifelse(language == 'NA', NA, str_squish(language))) %>% 
  mutate(language = str_replace(language, "/|\\|", ' | ')) %>% 
  # check location
  mutate(continent = str_squish(str_replace_all(str_squish(continent), '\\|', ' | '))) %>%
  mutate(continent = str_replace_all(continent, '_', ' ')) %>%
  mutate(continent = str_to_title(continent)) %>% 
  mutate(country = ifelse(country == 'NA', NA, str_squish(country))) %>% 
  mutate(country = str_replace_all(country, '_', ' ')) %>%
  mutate(country = ifelse(country == 'USSR', country, str_to_title(country))) %>%
  mutate(country = str_replace_all(country, 'And ', 'and ')) %>% 
  mutate(country = str_replace_all(country, 'Of', 'of')) %>% 
  mutate(country = str_replace_all(country, 'The', 'the')) %>%
  mutate(state_province = ifelse(state_province == 'NA', NA, str_squish(state_province))) %>%
  mutate(state_province = str_to_title(state_province)) %>% 
  mutate(iso_2 = ifelse(iso_2 == 'NA' & country != 'Namibia', NA, str_squish(iso_2))) %>% 
  mutate(iso_3 = ifelse(iso_3 == 'NA', NA, str_squish(iso_3))) %>%  
  mutate(iso_2 = str_squish(str_replace_all(str_squish(iso_2), '\\|', ' | '))) %>%
  mutate(iso_3 = str_squish(str_replace_all(str_squish(iso_3), '\\|', ' | '))) %>%
  mutate(gadm_level_1 = ifelse(gadm_level_1 == 'NA', NA, str_squish(gadm_level_1))) %>%
  mutate(gadm_level_1 = str_to_title(gadm_level_1)) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'And ', 'and ')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'Of', 'of')) %>% 
  mutate(gadm_level_1 = str_replace_all(gadm_level_1, 'The', 'the')) %>% 
  mutate(gadm_level_2 = ifelse(gadm_level_2 == 'NA', NA, str_squish(gadm_level_2))) %>%
  mutate(gadm_level_2 = str_to_title(gadm_level_2)) %>% 
  mutate(region_custom = ifelse(region_custom == 'NA', NA, str_squish(region_custom))) %>% 
  mutate(region_detail = ifelse(region_detail == 'NA', NA, str_squish(region_detail))) %>% 
  mutate(region_detail = str_squish(str_replace_all(str_squish(region_detail), '\\|', ' | '))) %>%
  # check event
  mutate(year = ifelse(year == 'NA', NA, year)) %>% 
  mutate(year = ifelse(year == '2024?', 2024, year)) %>% 
  mutate(year = as.numeric(year, na.rm=T)) %>% 
  mutate(year = case_when(is.na(year) & 
                            name_orig == 'Crveni popis hrvatskih koralja' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'A Red List of Benin’s sharks' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Crveni popis lišajeva Hrvatske' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Červené seznamy' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Coleoptera (Beetle) – Invertebrate Ireland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Tricoptera (Caddisfly) – InvertebrateIreland Online' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Les mammifères de la Côte d’Ivoire' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Rongeurs et insectivores de Côte d’Ivoire, leur habitat et leur statut de conservation' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Азербайджанской Республики' 
                          ~ 2024,
                          is.na(year) & 
                            name_orig == 'Красная книга Узбекистана' 
                          ~ 2024,
                          .default = year)) %>% 
  # select columns
  select(id, continent, 
         gadm_level_0 = country, gadm_level_1, gadm_level_2,
         region_custom, region_detail, iso_2, iso_3,
         taxa=group, kingdom, phylum, class, order, family, 
         url = url_clean, source_name= name_orig,
         language, year) 

metadata %>% slice_sample(n=50) %>% arrange(id) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
id continent gadm_level_0 gadm_level_1 gadm_level_2 region_custom region_detail iso_2 iso_3 taxa kingdom phylum class order family url source_name language year
35 Europe Austria AT AUT Birds Animalia Chordata Aves https://www.umweltbundesamt.at/fileadmin/site/themen/naturschutz/rl_voegel_2017.xlsx Liste für den Vogelschutz prioritärer Brutvögel (1. Fassung) Deutsch 2017
116 Europe Austria Niederösterreich AT AUT Lampreys Animalia Chordata https://noe.gv.at/noe/Naturschutz/RL_Fische.pdf Rote Listen ausgewählter Tiergruppen Niederösterreichs - Fische und Neunaugen (Pisces, Cyclostomata), Deutsch 1996
196 Europe Croatia HR HRV Crustaceans Animalia Arthropoda https://www.haop.hr/sites/default/files/uploads/dokumenti/03_prirodne/crvene_knjige_popisi/Crveni_popis_rakova_web.pdf Crveni popis slatkovodnih rakova Hrvatske Croatian 2013
286 Europe Switzerland CH CHE Orthoptera Animalia Arthropoda Insecta Orthoptera https://www.bafu.admin.ch/bafu/de/home/themen/biodiversitaet/publikationen-studien/publikationen/rote-liste-heuschrecken.html Rote Liste der gefährdeten Arten der Schweiz: Heuschrecken German 2007
310 Europe Spain Islas Canarias ES ESP Flora Plantae https://bibdigital.rjb.csic.es/records/item/15611-libro-rojo-de-especies-vegetales-amenazadas-de-las-islas-canarias?offset=1 Libro rojo de especies vegetales amenazadas de las Islas Canarias Spanish 1996
351 Africa Réunion RE REU Birds Animalia Chordata Aves https://inpn.mnhn.fr/espece/listerouge/FR/Oiseaux_Reunion_2010 Liste rouge des Oiseaux de La Réunion (France) French 2010
360 North America Guadeloupe GP GLP Bees Animalia Arthropoda Insecta Hymenoptera https://inpn.mnhn.fr/espece/listerouge/FR/Abeilles_Guadeloupe_2021 Liste rouge des abeilles de la Guadeloupe (France) French 2021
368 North America Guadeloupe GP GLP Molluscs Animalia Mollusca https://inpn.mnhn.fr/espece/listerouge/FR/Mollusques_terrestres_et_eau_douce_Guadeloupe_2021 Liste rouge des mollusques terrestres et d'eau douce de la Guadeloupe (France) French 2021
431 Europe France Centre FR FRA Breeding Birds Animalia Chordata Aves https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Centre-Val de Loire/6-oiseaux-nicheurs_2013_cle047818.pdf Liste rouge des Oiseaux nicheurs de la région Centre French 2013
480 Europe France Normandy FR FRA Mantodea Animalia Arthropoda Insecta Mantodea https://www.anbdd.fr/publication/liste-rouge-des-orthopteres-mantes-et-phasmes-de-normandie/ Liste rouge des Orthoptères de Normandie French 2022
503 Europe France Provence-Alpes-Côte D'azur FR FRA Butterflies Animalia Arthropoda Insecta Lepidoptera https://www.paca.developpement-durable.gouv.fr/listes-rouges-regionales-a7296.html?lang=fr Liste rouge régionale des papillons de Provence-Alpes-Côte d'Azur French 2014
562 Europe France Limousin FR FRA Flora Plantae https://inpn.mnhn.fr/docs/LR_FCE/LR_regionale/Limousin/LRR_Flore_Limousin.pdf La liste rouge de la flore vasculaire du Limousin French 2013
772 Europe Sweden Västmanland SE SWE Flora Plantae https://artfakta.se/sok SLU Artdatabanken (2020). Rödlista 2020 Swedish 2020
860 Europe Germany DE DEU Reptiles Animalia Chordata Reptilia https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html Rote Liste und Gesamtartenlisteder Reptilien (Reptilia) Deutschlands German 2020
925 Europe Germany DE DEU Staphylinidae Animalia Arthropoda Insecta Coleoptera Staphylinidae https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html Rote Liste und Gesamtartenliste der Kurzflüglerartigen, Stutzkäferartigen, landbewohnenden Kolbenwasserkäfer und Ufer-Kugelkäfer (Coleoptera: Polyphaga: Staphylinoidea, Histeroidea, Hydrophiloidea partim; Myxophaga: Sphaeriusidae) Deutschlands German 2021
966 Europe Germany DE DEU Lichens Plantae https://www.rote-liste-zentrum.de/en/Download-Vertebrates-1874.html Rote Liste und Artenverzeichnis der Flechten und flechtenbewohnenden Pilze Deutschlands German 2011
998 Europe Germany Baden-Württemberg DE DEU Grasshoppers Animalia Arthropoda Insecta Orthoptera https://www.lubw.baden-wuerttemberg.de/natur-und-landschaft/rote-listen Rote Liste und kommentiertes Verzeichnis der Heuschrecken und Fangschrecken Baden-Württembergs German 2019
1302 Europe Germany Hamburg DE DEU Zygaenidae Animalia Arthropoda Insecta Lepidoptera Zygaenidae https://www.hamburg.de/politik-und-verwaltung/behoerden/bukea/schmetterlinge-932356 Schutzprogramm für Tagfalter und Widderchen in Hamburg 1983 German 1983
1337 Europe Germany Hessen DE DEU Butterflies Animalia Arthropoda Insecta Lepidoptera https://www.hlnug.de/themen/naturschutz/rote-listen Rote Liste der Tagfalter German 2009
1583 Europe Germany Sachsen-Anhalt DE DEU Stoneflies Animalia Arthropoda Insecta Plecoptera https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2045 Rote Listen Sachsen-Anhalt 2020 German 2020
1594 Europe Germany Sachsen-Anhalt DE DEU Cicindelidae Animalia Arthropoda Insecta Coleoptera Carabidae https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2042 Rote Listen Sachsen-Anhalt 2020 German 2020
1604 Europe Germany Sachsen-Anhalt DE DEU Melyridae Animalia Arthropoda Insecta Coleoptera Melyridae https://lau.sachsen-anhalt.de/alt-vor-neuer-navigation/wir-ueber-uns-publikationen/fachpublikationen/berichte-des-lau/rote-listen-sachsen-anhalt-2046 Rote Listen Sachsen-Anhalt 2020 German 2020
1655 Europe Germany Sachsen-Anhalt DE DEU Fishes Animalia Chordata https://lau.sachsen-anhalt.de/naturschutz/arten-und-biotopschutz/berichte-lau-heft-39-2004-rote-liste Rote Listen Sachsen-Anhalt 2004 German 2004
1694 Europe Germany Sachsen-Anhalt DE DEU Lissomidae Animalia Arthropoda Insecta Coleoptera Elateridae https://lau.sachsen-anhalt.de/naturschutz/arten-und-biotopschutz/berichte-lau-heft-39-2004-rote-liste Rote Listen Sachsen-Anhalt 2004 German 2004
1712 Europe Germany Sachsen-Anhalt DE DEU Scorpionflies Animalia Arthropoda Insecta Mecoptera https://lau.sachsen-anhalt.de/naturschutz/arten-und-biotopschutz/berichte-lau-heft-39-2004-rote-liste Rote Listen Sachsen-Anhalt 2004 German 2004
1846 Europe Germany Thüringen DE DEU Pediciidae Animalia Arthropoda Insecta Diptera Pediciidae https://tlubn.thueringen.de/naturschutz/rote-listen/sonstige-wirbellose Rote Liste der Stelzmucken (Diptera: Limoniidae et Peticiidae) Thuringens 2001 German 2001
1994 North America Dutch Caribbean St. Eustatius | Aruba | Saba | Curacao | Saba bank | St. Maarten | Bonaire BQ | AW | BQ | CW | N/A | SX | BQ BES | ABW | BES | CUW | N/A | SXM | BES Flora Plantae https://www.dcbd.nl/resource/2773 Conservation species for the Dutch Caribbean (2020) English 2020
2066 Africa Kenya KE KEN Endemic Freshwater Fishes Animalia Chordata https://www.biodev2030.org/wp-content/uploads/2021/07/Annexe-33_Rapport-Final_National-Biodiversity-Threat-Assessment_Kenya.pdf Kenya National Biodiversity Threat Assessment English 2020
2119 Asia South Korea KR KOR Reptiles Animalia Chordata Reptilia https://ecolibrary.me.go.kr/nibr/#/search/detail/5686432?offset=19 Red Data Book of Republic of Korea - Reptiles Korean 2019
2267 Oceania Australia South Australia Adelaide & Mt Lofty Ranges AU AUS Fauna Animalia https://cdn.environment.sa.gov.au/environment/docs/amlr-fauna-conservation-assessments-data-gen.pdf Adelaide & Mt Lofty Ranges Fauna English 2013
2269 Oceania Australia South Australia SA Arid Lands (Outback) AU AUS Fauna Animalia https://cdn.environment.sa.gov.au/environment/docs/saal-fauna-conservation-assessments-data-gen.pdf SA Arid Lands (Outback) Fauna English 2013
2393 Europe Netherlands NL NLD Reptiles Animalia Chordata Reptilia https://www.ravon.nl/Portals/2/Bestanden/Publicaties/Rapporten/2021.043.pdf Basisrapport Rode Lijst amfibieën en reptielen volgens Nederlandse en IUCN-criteria Dutch 2023
2412 Europe Netherlands NL NLD Mayflies Animalia Arthropoda Insecta Ephemeroptera https://nl.wikipedia.org/wiki/Nederlandse_Rode_Lijst_(steenvliegen) Nederlandse Rode Lijst (haften) Dutch 2004
2429 Europe Estonia EE EST Fungi Fungi https://ojs.utlib.ee/index.php/FCE/article/download/fce.2019.56.12/10468 Red List of Estonian Fungi – 2019 update English 2019
2480 Europe Poland PL POL Freshwater Gastropods Animalia Mollusca Gastropoda https://rcin.org.pl/iop/dlibra/publication/97321/edition/115551 Czerwona lista zwierząt ginących i zagrożonych w Polsce: Ślimaki wodne Gastropoda aquatica Polish 2002
2499 Europe Poland PL POL Arachnida Animalia Arthropoda Arachnida https://rcin.org.pl/iop/dlibra/publication/97321/edition/115551 Czerwona lista zwierząt ginących i zagrożonych w Polsce: Arachnida Pajęczaki Polish 2002
2625 North America Honduras HN HND Reptiles Animalia Chordata Reptilia https://cdb.chmhonduras.org/phocadownloadpap/LISTAS_ROJAS/Lista Roja de Especies Amenazadas de Honduras - 2022_REV4_181223.pdf Lista Roja de Especies Amenazadas de Honduras Spanish 2022
2633 South America Bolivia Tierras Bajas BO BOL Flora Plantae https://drive.google.com/file/d/1kHQ-HC1STepkNLhLjuoFbTZ5tlsTA7kI/view LIBRO ROJO DE PLANTAS AMENAZADAS DE LAS TIERRAS BAJAS DE BOLIVIA Spanish 2020
2665 South America Colombia CO COL Reptiles Animalia Chordata Reptilia https://www.minambiente.gov.co/wp-content/uploads/2021/10/Libro-Rojo-de-Reptiles-de-Colombia-2015.pdf Libro Rojo de Reptiles de Colombia Spanish 2015
2735 Africa South Africa ZA ZAF Mammals Animalia Chordata Mammalia https://ewt.org.za/resources/mammal-red-list/ The Red List of Mammals of South Africa, Swaziland and Lesotho 2004 English 2004
2756 Africa Eswatini SZ SWZ Reptiles Animalia Chordata Reptilia https://www.sanbi.org/wp-content/uploads/2024/06/2023_Suricata10.pdf Conservation status of the Reptiles of South Africa, Eswatini and Lesotho English 2023
2767 Africa South Africa Table Mountain National Park ZA ZAF Spiders Animalia Arthropoda Arachnida Araneae https://www.researchgate.net/publication/378888951_Checklist_of_the_spiders_Arachnida_Araneae_of_the_Table_Mountain_National_Park_South_Africa Checklist of the spiders (Arachnida, Araneae) of theTable Mountain National Park, South Africa English 2024
2768 Africa South Africa ZA ZAF Medicinal Plants Plantae https://www.researchgate.net/publication/236031408_Red_Listed_medicinal_plants_of_South_Africa_Status_trends_and_assessment_challenges Red Listed medicinal plants of South Africa: Status, trends, and assessment challenges English 2013
2845 Asia Bangladesh BD BGD Birds Animalia Chordata Aves https://portals.iucn.org/library/node/46324 Red List of Bangladesh : volume 3 : birds English 2015
2854 Asia Bangladesh BD BGD Birds Animalia Chordata Aves https://portals.iucn.org/library/node/7789 Red Book of threatened birds of Bangladesh English 2000
2977 Asia Japan JP JPN Algae Plantae https://ikilog.biodic.go.jp/Rdb/booklist レッドリスト2006 Japanese 2006
3025 Asia Iran IR IRN Amphibians Animalia Chordata Amphibia https://www.researchgate.net/publication/370608014_A_synoptic_review_of_the_Amphibians_of_Iran_bibliography_taxonomy_synonymy_distribution_conservation_status_and_identification_key_to_the_eggs_larvae_and_adults A synoptic review of the Amphibians of Iran: bibliography, taxonomy, synonymy, distribution, conservation status, and identification key to the eggs, larvae, and adults English 2023
3052 Asia Russia Altay RU RUS Flora Plantae https://www.plantarium.ru/lang/en/page/redbook/id/264.html Красная книга Алтайского края. Редкие и находящиеся под угрозой исчезновения виды растений и грибов. Т. 1. Барнаул, 2016. Russian 2016
3197 Europe Russia Perm' RU RUS Flora Plantae https://www.plantarium.ru/lang/en/page/redbook/id/263.html Красная книга Среднего Урала (Свердловская и Пермская области): Редкие и находящиеся под угрозой исчезновения виды животных и растений. Екатеринбург, 1996. Russian 1996
3236 Europe | Asia USSR USSR Flora Plantae https://www.plantarium.ru/lang/en/page/redbook/id/125.html Красная книга СССР: редкие и находящиеся под угрозой исчезновения виды животных и растений. Издание 2-е, переработанное и дополненное. Т. 2. Москва, 1984. Russian 1984

Data audit

Summary

Code
metadata %>% 
  summarise(`Number of records` = n(),
            `Number of sources` = n_distinct(source_name),
            `Number of taxa` = n_distinct(taxa),
            `Animalia records` = sum(kingdom == 'Animalia'),
            `Plantae records` = sum(kingdom == 'Plantae'),
            `Fungi records` = sum(kingdom == 'Fungi'),
            Countries = n_distinct(gadm_level_0),
            `Sub-national territories` = n_distinct(gadm_level_1)) %>% 
  t() %>% `colnames<-`(c("N")) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
N
Number of records 3200
Number of sources 2093
Number of taxa 487
Animalia records 2190
Plantae records 898
Fungi records 105
Countries 172
Sub-national territories 223

Geographic coverage

Code
world <- geodata::world(resolution = 3, level = 0, path = 'data/')


world_records <- left_join(st_as_sf(world), metadata %>%
  separate_rows(iso_3, sep = '\\|') %>% 
  select(iso_2, GID_0=iso_3, source_name)) %>% 
  group_by(GID_0, NAME_0) %>% 
  summarise(n_records = ifelse(n_distinct(source_name, na.rm = TRUE)==0, 
                               0, n_distinct(source_name, na.rm = TRUE)),
            iso_2_string = ifelse(n_records>0,
                                  paste(iso_2, collapse = ';'), NA))

plot_figure_1 <- tm_shape(world_records %>% 
           select(-iso_2_string) %>% 
           mutate(n_records=ifelse(n_records==0,
                                   NA, n_records))) +
  tm_polygons(fill = 'n_records',fill_alpha = 0.9,
              col='grey40', col_alpha = 0.2,
              fill.scale = tm_scale_intervals(n = 6, 
                                              #style = 'jenks', 
                                              breaks = c(1,5,10,20,100,979),
                                              values = 'brewer.reds',
                                              value.na = 'grey80',
                                              label.na = '0'),
              fill.legend = tm_legend(item.space = 0, item.na.space = 0,
                                      title = 'Number of sources', 
                                      reverse=T, 
                                      # frame=F, 
                                      frame.lwd = 0.1,
                                      bg.color = 'white')) +
  tm_layout(legend.outside = T, 
            legend.position = c('left','bottom'),  frame=F) +
  tm_crs(property='global')

tmap_mode('plot')
plot_figure_1

Code
tmap_mode('view')
plot_figure_1

Taxonomic coverage

Code
metadata %>% 
    filter(!is.na(class)) %>% 
    filter(kingdom %in% c('Animalia')) %>% 
    group_by(kingdom, class) %>% 
    summarise(n_sources_taxa = n()) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom class Number of sources
Animalia Insecta 854
Animalia Mammalia 202
Animalia Aves 185
Animalia Reptilia 148
Animalia Amphibia 133
Code
metadata %>% 
    filter(!is.na(order)) %>% 
    filter(kingdom %in% c('Plantae')) %>% 
    group_by(kingdom, order) %>% 
    summarise(n_sources_taxa = n()) %>% 
    arrange(desc(n_sources_taxa)) %>% 
    slice_head(n=5) %>% 
  rename(`Number of sources` = n_sources_taxa) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
kingdom order Number of sources
Plantae Charales 29
Plantae Asparagales 5
Plantae Caryophyllales 2
Plantae Malvales 2
Plantae Apiales 1
Code
# fish sources
metadata %>% 
  filter(grepl('fish', taxa, ignore.case=T)) %>% 
  filter(!grepl('crayfish', taxa, ignore.case=T)) %>% 
  distinct(source_name) %>% count() %>% 
  rename(`Number of fish sources` = n) %>% 
  kableExtra::kbl(booktabs = T)  %>% 
  kableExtra::kable_styling(latex_options = c('striped', 'hold_position'))
Number of fish sources
102
Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n()) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(phylum)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, phylum) %>% 
  summarise(n_sources_taxa = n()) %>% 
  ggplot(aes(x = reorder(phylum, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Animalia')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n()) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Code
metadata %>% 
  filter(!is.na(order)) %>% 
  filter(kingdom %in% c('Plantae', 'Fungi')) %>% 
  group_by(kingdom, order) %>% 
  summarise(n_sources_taxa = n()) %>% 
  ggplot(aes(x = reorder(order, -n_sources_taxa), y = n_sources_taxa)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  facet_wrap(~kingdom, scales = 'free') +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 

Temporal coverage

Code
metadata %>% 
  group_by(year) %>% 
  summarise(publications_year = n_distinct(source_name)) %>% 
  ggplot(aes(x = year, y = publications_year)) +
  geom_bar(stat = "identity", fill = "#4CAF50") +
  ylim(c(0,150)) + xlim(c(1975,2025)) +
  scale_x_continuous(n.breaks = 15) +
  labs(x = "", y = "Number of sources") +
  ggpubr::theme_pubclean()